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^^4J I g 



# ! /usr/local/bin/perl 

# COPYRIGHT (c) 1998 Bell Communications Research Inc., 

# All Rights Reserved. 
# 

# PROPRIETARY - BELLCORE AND AUTHORIZED CLIENTS ONLY. 
# 

# This document contains proprietary information that shall 

# be distributed or routed only within Bell Communications 

# Research (Bellcore), and its authorized clients, except 

# with written permission of Bellcore. 
# 

# $Id: getCovar.pl,v 1,2 1999/05/25 15:33:47 rmartija Exp rmartija $ 
# 

undef ; 

require 1 getopts . pi 1 ; 

require ' /u/rmartij a/net fiizer/ scripts/math- pi '; 

$USAGE « "Usage % " . $o . 11 [-D] -d domain file\n\n° . 
i "Options :\n 11 . 

" -D debug mode\n" . 

» -d domain domain type (1=*JS, 2=Non-US>\n» - 

file name of input file. The default is STDIN. \n\n M 

"Example s\n" - 

» $o . ./data/test. out \n n . 

» $0 -d 1 . ./data/ teat. out \n ,( . 

" $0 -D . ,/dat:a/test.out\n" - 

n $o -D -d2 . ./data/ test. out NnXn" ; 



main program #«###»##»##»######### ******* 



$x = &Getopts( 'd:D« ) ; 

die "$USAGE\n" unless ($x ne • '); 

die rt $USAGE\n w unless $opt_d &fc $opt_d >= 1 && $opt_d <= 2; 

if < $opt_d == 1 ) { 
$domain = 'US' ; 

else { 

$domain = 'NONUS'; 

} 

$oloLoc = • ' ; 
$rOW6 » 0; 
$COlS - 0; 

die "$USAGE\n" if( $#ARGV > 0 ); 

if( $#ARGV < 0 || $ARGV[0] eq ) { 

$ INPUT - STDIN; 

else { 
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die "ERROR: cannot open $ARGV[0)\n n unless -r $ARGV[0] ; 
open( INPUT, "< $ARGV[0] n ); . 
$ INPUT * INPUT; 

} 

while ( <$INFUT> ) { 
chop ,- 

next unless length ($_) > 0; 
©tokens * split ( '\t' , $_ ); 
$locale = $tokens[o]; 

if ( $loeale ne $oldLoc ) { 
if { $oldLoc ne r 1 ) { 

%m = &getMean»( $rows-l, $cols, *matrix ); 
print *$domain: $oldLoc\n T '; 
print "MEAN: '» ; 

for{ $i = 1; $i <- $cols; $i++ ) { 
print f n.2f", $m{$i} ; 
print n • if( $i < $cols ); 
print M \n R if( $i == $cols ); 

} 

i£< $opt_D ) { 

print "ORIGINAL MATRIX :\n" ; 

for( $i =* l; $i <= $cola; $i++ ) { 

for( $j a 1; $cOlS; $j++ ) { 

print* "%12.2f", $matrix{$j + ( ($i - 1) * $cols) } ; 
print " » if( $j < $cols ); 
print B \n H if( $j $cols ); 

} 

} 

print 0 \n" ; 

} 

%S - &getCovarianceMatrix( $rows-l, $cols, *matrix, *m >; 

if ( $optJ> ) { 

print "COVARIANCB MATRIX ;\n" ; 
£or( Si ~ 1; $i <- $cOlS; $i++ ) { 

for( $j = 1; $5 < a $cols; $j++ ) { 

printf «%12.2f», $s{$j + ( ($1 - X) * $cols) } ; 
print ■ " if ( $j < $cols >; 
print »\n" if ( $j == $cols ); 

} 

} 

print "\n" ; 

} 

%I = &getInverseMatrix( $cols, *S ); 
print "INVERSE OP COVARIANCE MATRIX; \n" 7 
for( $i = 1; $i <- $cols; $i++ ) { 

for{ Sj = l; $j <^ $cola; ) { 

printf »%12,2f», $l{$j + <($i - 1) * $cols)} ; 

print p p if( $j < $cols ); 

print w \n" if ( $j $cols ); 

} 

} 

print M \n B ; 
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} 

SoldLoc - Slocale; 
$rows = l; 

$cols = ©tokens - 1? 

i 

£or( $j = 1; $j <= $cols; $j++ ) { 

$matrix{$j + ( {$rows - 1) * Scols)} - $tokens[$jl * 1.0; 

} 

} 

close ( $ INPUT ) unless $#ARGV < 0 || $ARGV[0] eq 

%ra » &getMeans( $row3-l, $cole, *matrix >/ 
print "$domainr $oldLoc\n n ; 
print "MEAN; " ; 

fort $i = 1; $i <= $cols; $i++ ) { 
printf "%.2f t, / $m{$i} ; 
print * " iff $i < Scols ); 
print "\n M if ( $i ™ $cols ) ; 

} 

if ( $opt D ) { 

print "ORIGINAL MATRIX; \n" ; 

for( $i = 1; $i <= Scols; $i«-+ ) { 

for( $j - 1; $j $cols; > { 

printf "%12.2!f», $matrix{$j + (t$i - 13 * $cols) } ; 
print " rt if( $j < $cols ); 
print % '\n n iJ:( $j «■ Scols ); 

} 

} 

print "\n" ; 

} 

%S = fcgetCovarianceMatrix ( $rowa-l, $cols, *matrix, *m ); 
if ( $opt_D > { 

print »'COVASIANCE:\n" ; 

for( Si = 1; $i $»3ols; $i«-t- ) { 

for( $j = X; $j <- $cols; $j++ ) { 

printf »%12.2f", $s{$j + C($i - D * $cols) } ; 
print " 11 if ( $ j < Scols ) ; 
print "\n" i£( $j -= Scols }; 

} 

} 

print °\n" ; 

} 

%I » ^getlnverseMatrix( Scols, *S ); 
print rt INVERSE OF COVARIANCE MATRIX :\n n ; 
for( 5i * 1; $i Scols; $i++ ) { 

fort $3 * l; $3 Scols; $j++ ) { 

printf "%l2>2f«, $l{$j + ( <$i - 1) * Scols)} ; 

print " 11 if< $j < Scols ); 

print »\n« if ( Sj == $cols ); 
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\ \ /usr/ local/bin/perl 
COPYRIGHT (c) 1938 Telcordia Technologies Inc., 
All Rights Reserved. 

PROPRIETARY - BELLCORE AND AUTHORIZED CLIENTS ONLY. 

This document contains proprietary information that shall 
be distributed or routed only within Telcordia Technologies 
(Telcordia), and its authorized clients, except 
with written permission of Telcordia. 



$Id: 



getHostLoc.pl,v 1.1 1999/05/20 22:27:07 rmartija Exp rtnartija $ 



require 1 getopts.pl • 



undef ; 
$USAGE 



"Usage: " . 
"Flags »\n a 

« -u file 



-m file 



$0 . " -u file -m file\n n . 

debug mode\n" - 

J:ile containing the list of unclassified IP\n" - 
addresses (i.e. those with unknown locations) \n" - 
and their characteristics . \n" - 

file containing the means and inverse of covariance\n n 
matrices^' 1 . 



"Examples :\n M . 
" $0 -u unknowns 



-m matrix" 



%g_means = ( ) ; 
%g~inverse = 0 ; 
®g_locales; 
Sg_debug; 
$g_attributes ; 



# 

# " " """ 

sub getDistance { 

my{ $loc, $data ) ~ 

my( OX ) - (&$data; 

my ( ®mu ) = @{$g_means{$loc} } ; 

my{ ®sigma ) = ®{$gJLnverse{$loc}} ; 

ray( @diff , ©prod ) ,- 
my( $i, $j ); 

for{ $i = 0; $i $g_attributes ; $i++ ) { 
$diff[$i] = $mu|$ij - $X[$i]; 

} 

t compute diff (transpose) * sigma. diff (transpose) is a 1 x N 
# and sigma isaHxN matrix, the result is a 1 x N matrxx. 
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for( $i = 0; $i <= $g_attributes; $i++ ) { 
$prod[$i] =0.; 

for( $j = 0; $j <= $g attributes; $j++ ) { 
$prod[$i) += $di£f[$j] » $sigma[$i] t$j] ; 

} 

} 

# 

# multiply the matrix obtained above, i,e prod, with diff . prod is a 

# a 1 x N matrix and diff is a N x 1 matrix, the result is a scalar. 
# 

my ( $diat ) = 0; 

for( $i = 0; $i <= $g_attributes; $i++ ) { 
$dist += $prod[$i] * $difft$i]f 

} 

return $diet; 



# 

# - — 

sub readMeansAndMatnceo { 
my ( $file ) - 

open( P, »< §file« ) ; 
©lines = <P>; 
close ( F ) ; 

my < $n_rowe # $cur_row, $line_num ) ~ (-1/ 0, 0) ; 
my( $cur_loc, $n_meaas ); 

foreachC ©lines ) { 
chop; 

$linejaum++; 

next if $_ / A \s*$/; # skip blank lines 

if ( $ =~ / A US.*As*(.*)/ ) { . 

die "ERROR; $file is corrupted\n-> line $line_num: $_\n 
unless $n_rows < 0; 

# $1 contains the state string (e.g. NJ) 
$cur_loc = "$1,US"; 

$cur_row = 0,- 

ilsif ( $_ — /*N0NUS.*:\8+(.*)/ ) { 

die "ERROR: $file is corrupced\n-> line $line_num: $_\n" 
unless #n_rows < 0; 

# $1 contains the country string (e.g. BE) 
$curJLoc - 

$curj:ow = (); 

elsif ( $_ — / A MEAN.*:\6*<-*)/ ) { 
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die "ERROR: $file is corrupted\n-> line $line_num: $J\n n 
unless $n_rows < 0; 

# $1 contains something like 18.43 1130.71 20.00 170.71 19.57 228.5 
my( <3>meana ) = split ( r 1 , $1 ) ; 

$n_means = $n_rows = $#means; 
$g_means{$cur_loc} = \@means; 

elsif ( $_ — /"INVERSE. *:\s*(.*)/ ) { 

die "ERROR; $file is corrupted\n-> line $line_num: $_\n n 
unless $cur__row == 0; 

elsif ( $ — / A ([A-2a-s]+) .*;/ ) { 

die ""ERROR: Invalid Tag in $file\n-> line $line_num: $_\n" ,- 

} 

else { 

my( Grow ) = split { 1 $_ ) ; 

# make sure the matrix is a $n_means X $n_meana array 
die "ERROR: $file is corrupted\n-> line $line_num: $_\n" 

unless $8 row — $n_means && $cur_row <~ $n_means; 

my( $r_entry ) [©row] ,• 

push( @{$g_inverae{$cur_loc} } , $r_entry ); 

$cur_row++; 
$n_rows--; 



die "ERROR: $file is corrupted. More data expected. \n» unless $n_rows < 0; 

®g_locales = keys %g_means; 
return $n_meanB; 

} 

# — ™ 

# ' 

sub classifylPs { 
my ( $file ) = 

open( F, "c $file» ); 

my( ©data, $tloc, $loc r %dist, $min )? 

while ( <F> ) { 

next'unless $_ =- r <\d+) \ . (\d+) \ . (\d+) \ . (\d+) -*\s* ( . *) /; 
($ip, ©data) = split{ • ■ ); 
next unless $#data « $g_attributes; 

$min = time; # initialize Sdist to some arbitrary large number 
# such as the number of seconds since 1/1/1970 
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foreach $tloc ( @g_locale9 ) { 

$dist{$tloc} = tgetDistanee ( $tloc, \®data )/ 
if( $diet{$tloc} < $min ) { 
$min = $dist{$tloc} ; 

$l0C = $tloC; 

} 

} 

iff $g_debug } { 

foreach $key (sort keys %dist) { 

printf »%-15s ft-8s %7.2f\n n , $ip, $key, $diat{$key}; 

} 

} 

printf «%-15s %-8s\n», $ip, $loc; 

} 

ClOSe { F ) ; 

} 



#############«»######^^ 

####################### main program 
### ######*###^ 

$x » &Getopta< 'u;m;D' ); 

die n $USAGE\n n unless ($x ne 1 '); 

die "$USAaE\n" unless ($cpt_u && $opt_m) ; 

die "ERROR: cannot open $opt_u\n" unless -e $opt_u; 
die n ERROR : cannot open $opt_m\n" unless -e $opt_ra; 

$g_debug = 1 if( $opt_D ) ; 

$g"attributes = fcreadMearxsAndMatrices ( $opt_m ) ; 
StclassifyIP3< $opt_u ); 
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# I /usr/ local/bin/perl 

# COPYRIGHT (c) 1998 Bell Communications Research Inc., 

# All Rights Reserved. 
# 

# PROPRIETARY - BELLCORE AND AUTHORIZED CLIENTS ONLY. 
# 

# This document contains proprietary information that shall 

# be distributed or routed only within Bell Communications 

# Research (Bellcore), and its authorized clients, except 

# with written permission of Bellcore. 
# 

# $Id: classify. pl,v 1-1 1999/05/05 13:20:39 rmartija Exp $ 
# 



undef ; 

require 'getopts .pi V 



[-D] - p path -d file -t type -h file\n\n" 



$USAGE = "Usage; " . $0 
"Options An" . 
i. -D 
" -p path 
-d file 
" -t type 
-h file 
"Example :\n" . 

« $o -p , ./data/local -d all domains. Icl -t 1 -h dat,txt\n" . 

» $0 -D -p ../data/national -d alldomains .nat -t 2 -h dat.txt\n" 

" $0 -p . ./data/non-ua -d alldomains -nus -t 3 -h dat.txt\n\n" ; 



debug mode\n" . 
output directory\n n , 
name of domains file\n" 
domain type (1=US only, 
name of hosts file\a\n" 



2-Non-US only, 3=Global) \n" 



# 

# - - " 

sub prompt { 

my( $msg, $choices, $default, $nocase ) - @_,* 
my( $ reply ) ; 

print STDERR $msg; 

print STD0UT " ($choices) p if( $choices ); 

print STDOUT "? 

print STDOUT n [$def aultj " if( $default ) 

$reply - <STDIN>; 
chop { $reply ) ; 

<$reply — tr/a-z/A-2/) if( $nocase ); 

($reply = $default) if( length ($reply) -« 0 && $default ); 
return $reply; 

} 



# 

# 

sub overwrite { 

my ( $file ) - <S>_; 
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} 



return 1 if ( I (-e $file || -d $file) ) ; 

my( $msg ) ~ "$file exists- Overwrite" ; 

return a if( fiprorapt ($mag. "Y/N», "V", 1) eg "Y" ); 

return 0; 



main program 

$X = &Getopts( 'd:h;p:t;D* ); 
die '^USAGBXn" unless ($*: ne "); 

die "ERROR: cannot open $opt_d\n 1 ' unless -r $opt_d; 
die "ERROR: cannot open $opt_h\n" unless -r $opt_h; 
die "ERROR: domain type not specif ied\n$USAGE\n" unless $opt_t; 

die "ERROR : invalid domain type ($opt_t) \n$USAGE\n M 
unless (int($opt_t) >■ 1 && int($opt_t) 3); 

ehop( Sbasket = "baseaamfc $opt_d~ ) ; 
open( F, "< $opt_d" > ; 
©domains = <P> ; 
close( F ) ; 

unlink °$opt_p/$ basket" unless $opt_t == 3; 
foreach $domainName (©domains) { 
chop ( $domainName ) ; 

$cmd s "g r ep V\\,$domainName\$V $opt_h | cut -d\":\ M -fl " ; 

if( $opt_t == 3 ) { 

$emd~\» *> $opt_j3/$domainKame n ; 

} 

else { 

$cmd .= "» $opt _p/$basket" ; 

} 

"$cmd fc ; 
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